FeatureUnion 跟 pipeline 真的好用,可以暴力法選出特徵設定跟模型的參數
隨意找個介紹
https://www.kaggle.com/baghern/a-deep-dive-into-sklearn-pipelines
from sklearn.base import BaseEstimator, TransformerMixin
class RollingWindow(BaseEstimator, TransformerMixin):
def __init__(self, window):
self.window = window
def transform(self, X, y=None):
for col in X.columns:
data=pd.merge(data, data[col].rolling(window=self.window).agg(['sum','std','mean','max','min','median','kurt','skew']),on='date',suffixes=('','_'+col) )
data=data.dropna(axis=1,how='all')
data=data[self.window-1:]
data=data.fillna(0)
return data
def fit(self, X, y=None):
return self
pipeline=Pipeline([
('Rolling',RollingWindow() ),
('GBR',GradientBoostingRegressor() ),
])
param = {
'Rolling__window':[4,5,6],
'GBR__n_estimators': [100,200,300]
}
clf = GridSearchCV(pipeline, param, cv=5)
clf.fit(X_train, y_train)
大概是這樣,有空再寫詳細一點